import requests
import ddddocr
import re

ocr = ddddocr.DdddOcr(show_ad=False, beta=True)
DEBUG = True
headers = {
	"user-agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36"
}
# res = requests.get("https://www.bjcourt.gov.cn/zxxx/indexOld.htm?st=1&zxxxlx=100013007&bzxrlx=&bzxrxm=&zrr=&frhqtzz=&jbfyId=&ah=&dqxh=26&page=11", headers=headers)
#
# print(res.text)


# img_res = requests.get("https://www.bjcourt.gov.cn/yzm.jpg?n=0", headers=headers)
#
# code = ocr.classification(img_res.content)
# print(f"原始code {code}  ")
# code = code[-5:]
# print(f"处理code {code}  ")

# if DEBUG:
# 	with open(f"./codes/bjfy_{code}.png", "wb") as f:
# 		f.write(img_res.content)


# post_res = requests.post("https://www.bjcourt.gov.cn/cpws/checkkaptcha.htm", data={
# 	"yzm": 111
# }, headers=headers)
#
# print(post_res.url)


session = requests.Session()

page = 1
while page < 439:
	url = f"https://www.bjcourt.gov.cn/zxxx/indexOld.htm?st=1&zxxxlx=100013007&bzxrlx=&bzxrxm=&zrr=&frhqtzz=&jbfyId=&ah=&dqxh=26&page={page}"
	res = session.get(url, headers=headers)
	if "验证码" in res.text:
		while True:
			print("开始破解验证码  ......")
			img_res = session.get("https://www.bjcourt.gov.cn/yzm.jpg?n=0", headers=headers)
			code = ocr.classification(img_res.content)
			print(f"原始code {code}  ")
			code = code[-5:]
			print(f"处理code {code}  ")

			if DEBUG:
				with open(f"./codes/bjfy_{code}.png", "wb") as f:
					f.write(img_res.content)

			post_res = session.post("https://www.bjcourt.gov.cn/cpws/checkkaptcha.htm", data={
				"yzm": f"{code}"
			}, headers=headers, allow_redirects=False)

			if post_res.status_code == 302:
				pass
			else:
				break
	else:
		print(f"正常获取数据{res.url}页..................", re.search(r'<title>(.*?)</title>', res.text, re.S).group())
		page += 1


